1 Get Data Ready

1.1 Repeat the EDA part

<U+393C><U+3E31>lubridate<U+393C><U+3E32> namespace cannot be unloaded:
  namespace <U+393C><U+3E31>lubridate<U+393C><U+3E32> is imported by <U+393C><U+3E31>tidyverse<U+393C><U+3E32> so cannot be unloaded<U+393C><U+3E31>scales<U+393C><U+3E32> namespace cannot be unloaded:
  namespace <U+393C><U+3E31>scales<U+393C><U+3E32> is imported by <U+393C><U+3E31>kableExtra<U+393C><U+3E32>, <U+393C><U+3E31>ggplot2<U+393C><U+3E32> so cannot be unloaded<U+393C><U+3E31>forcats<U+393C><U+3E32> namespace cannot be unloaded:
  namespace <U+393C><U+3E31>forcats<U+393C><U+3E32> is imported by <U+393C><U+3E31>haven<U+393C><U+3E32>, <U+393C><U+3E31>tidyverse<U+393C><U+3E32> so cannot be unloaded<U+393C><U+3E31>stringr<U+393C><U+3E32> namespace cannot be unloaded:
  namespace <U+393C><U+3E31>stringr<U+393C><U+3E32> is imported by <U+393C><U+3E31>tidyverse<U+393C><U+3E32>, <U+393C><U+3E31>lubridate<U+393C><U+3E32> so cannot be unloaded<U+393C><U+3E31>dplyr<U+393C><U+3E32> namespace cannot be unloaded:
  namespace <U+393C><U+3E31>dplyr<U+393C><U+3E32> is imported by <U+393C><U+3E31>broom<U+393C><U+3E32> so cannot be unloaded<U+393C><U+3E31>purrr<U+393C><U+3E32> namespace cannot be unloaded:
  namespace <U+393C><U+3E31>purrr<U+393C><U+3E32> is imported by <U+393C><U+3E31>tidyselect<U+393C><U+3E32>, <U+393C><U+3E31>modelr<U+393C><U+3E32>, <U+393C><U+3E31>broom<U+393C><U+3E32>, <U+393C><U+3E31>tidyr<U+393C><U+3E32> so cannot be unloaded<U+393C><U+3E31>readr<U+393C><U+3E32> namespace cannot be unloaded:
  namespace <U+393C><U+3E31>readr<U+393C><U+3E32> is imported by <U+393C><U+3E31>tidyverse<U+393C><U+3E32> so cannot be unloaded<U+393C><U+3E31>tidyr<U+393C><U+3E32> namespace cannot be unloaded:
  namespace <U+393C><U+3E31>tidyr<U+393C><U+3E32> is imported by <U+393C><U+3E31>tidyverse<U+393C><U+3E32>, <U+393C><U+3E31>broom<U+393C><U+3E32> so cannot be unloaded<U+393C><U+3E31>tibble<U+393C><U+3E32> namespace cannot be unloaded:
  namespace <U+393C><U+3E31>tibble<U+393C><U+3E32> is imported by <U+393C><U+3E31>haven<U+393C><U+3E32>, <U+393C><U+3E31>dplyr<U+393C><U+3E32>, <U+393C><U+3E31>modelr<U+393C><U+3E32>, <U+393C><U+3E31>broom<U+393C><U+3E32>, <U+393C><U+3E31>readr<U+393C><U+3E32>, <U+393C><U+3E31>ggplot2<U+393C><U+3E32>, <U+393C><U+3E31>tidyr<U+393C><U+3E32> so cannot be unloaded<U+393C><U+3E31>ggplot2<U+393C><U+3E32> namespace cannot be unloaded:
  namespace <U+393C><U+3E31>ggplot2<U+393C><U+3E32> is imported by <U+393C><U+3E31>tidyverse<U+393C><U+3E32> so cannot be unloaded
[[1]]
NULL

[[2]]
NULL

[[3]]
NULL

[[4]]
NULL

[[5]]
NULL

[[6]]
NULL

[[7]]
NULL

[[8]]
NULL

[[9]]
NULL

[[10]]
NULL

[[11]]
NULL

[[12]]
NULL

[[13]]
NULL

[[14]]
NULL

[[15]]
NULL

[[16]]
NULL

[[17]]
NULL

[[18]]
NULL
package <U+393C><U+3E31>kableExtra<U+393C><U+3E32> was built under R version 3.5.2package <U+393C><U+3E31>formattable<U+393C><U+3E32> was built under R version 3.5.2package <U+393C><U+3E31>pastecs<U+393C><U+3E32> was built under R version 3.5.3package <U+393C><U+3E31>directlabels<U+393C><U+3E32> was built under R version 3.5.3package <U+393C><U+3E31>Metrics<U+393C><U+3E32> was built under R version 3.5.3
number of columns of result is not a multiple of vector length (arg 1)33566 parsing failures.
row # A tibble: 5 x 5 col     row col                  expected              actual file                expected   <int> <chr>                <chr>                 <chr>  <chr>               actual 1  1038 cummulative_gross_r~ no trailing characte~ .3     'BOOKINGS_ATLCP.cs~ file 2  1038 OTB_rev              no trailing characte~ .3     'BOOKINGS_ATLCP.cs~ row 3  1038 OTB_rev_to_survive   no trailing characte~ .3     'BOOKINGS_ATLCP.cs~ col 4  1039 cummulative_gross_r~ no trailing characte~ .3     'BOOKINGS_ATLCP.cs~ expected 5  1039 OTB_rev              no trailing characte~ .3     'BOOKINGS_ATLCP.cs~
... ................................. ... ............................................................................. ........ .......................................................................................................................................................................................................... ...... ...................................................................................................... .... ...................................................................................................... ... ...................................................................................................... ... ...................................................................................................... ........ ......................................................................................................
See problems(...) for more details.
number of columns of result is not a multiple of vector length (arg 1)31164 parsing failures.
row # A tibble: 5 x 5 col     row col                 expected               actual file                 expected   <int> <chr>               <chr>                  <chr>  <chr>                actual 1  2136 cummulative_cxl_rev no trailing characters .1     'BOOKINGS_NYCHA.csv' file 2  2137 cummulative_cxl_rev no trailing characters .1     'BOOKINGS_NYCHA.csv' row 3  2138 daily_cxl_rev       no trailing characters .1     'BOOKINGS_NYCHA.csv' col 4  2138 cummulative_cxl_rev no trailing characters .1     'BOOKINGS_NYCHA.csv' expected 5  2139 OTB_rev_to_be_cxl   no trailing characters .1     'BOOKINGS_NYCHA.csv'
... ................................. ... .............................................................................. ........ ........................................................................................................................................................................................................... ...... ....................................................................................................... .... ....................................................................................................... ... ....................................................................................................... ... ....................................................................................................... ........ .......................................................................................................
See problems(...) for more details.
# change date fields to date
hot1$stay_dt <- as.Date(hot1$stay_dt, "%m/%d/%Y")
hot1$booking_dt <- as.Date(hot1$booking_dt, "%m/%d/%Y")
hot2$stay_dt <- as.Date(hot2$stay_dt, "%m/%d/%Y")
hot2$booking_dt <- as.Date(hot2$booking_dt, "%m/%d/%Y")
#create field for non-numerical dow
hot1$day_of_week <- as.factor(hot1$dow)
hot2$day_of_week <- as.factor(hot2$dow)
# Reserve last 3 weeks data as Validation Data and the prior are Training Data
h1_train <- hot1[hot1$stay_dt <= (max(hot1$stay_dt) - 21),]
h2_train <- hot2[hot2$stay_dt <= (max(hot2$stay_dt) - 21),]

1.1.1 Hotel 1 - ATL

#add price and otb_cxl_rate to h1_train
h1_train <-  h1_train %>%
  filter(OTB!=0) %>% 
  mutate(price = OTB_rev/OTB, OTB_cxl_rate = OTB_to_be_cxl/OTB)
h1_train <- h1_train %>%
  filter(days_prior < 33)
h1_train <- h1_train %>%
  mutate(day_type = case_when(day_of_week == '7' ~ 'weekend',
                              day_of_week == '1' ~ 'weekend', 
                              TRUE ~'weekday'))
h1_train$month = month(h1_train$stay_dt)

1.1.1.1 Grouping method 1

# make this a group in h1_train
h1_train_1 <- h1_train %>%
      mutate(prod_group_perc = case_when(product_type == 'OPAQUE' ~ 'OP/FENC',
                                         product_type == 'FENCED' ~ 'OP/FENC',
                                         product_type == 'CORPORATE' ~ 'CORP/BUS',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'CORP/BUS',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'MEM/OTHER',
                                         product_type == 'OTHER' ~ 'MEM/OTHER',
                                         product_type == 'UNFENCED' ~ 'GOV/UNFENC',
                                         product_type == 'GOVERNMENT' ~ 'GOV/UNFENC'))

1.1.1.2 Grouping method 2

#Create this group
h1_train_2 <- h1_train %>%
      mutate(prod_group_behav = case_when(product_type == 'OPAQUE' ~ 'OP/FENC/OTH',
                                         product_type == 'FENCED' ~ 'OP/FENC/OTH',
                                         product_type == 'CORPORATE' ~ 'CORPORATE',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'GOV/BUS/MEM',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'GOV/BUS/MEM',
                                         product_type == 'OTHER' ~ 'OP/FENC/OTH',
                                         product_type == 'UNFENCED' ~ 'UNFENCED',
                                         product_type == 'GOVERNMENT' ~ 'GOV/BUS/MEM'))

1.1.1.3 Grouping method 3

#Create this group
h1_train_3 <- h1_train %>%
      mutate(prod_group_cxl_rate = case_when(product_type == 'OPAQUE' ~ 'GOV/UNFEN/OP',
                                         product_type == 'FENCED' ~ 'CORP/FEN/MEM',
                                         product_type == 'CORPORATE' ~ 'CORP/FEN/MEM',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'OTH/BTA',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'MEMBERSHIP' ~ 'CORP/FEN/MEM',
                                         product_type == 'OTHER' ~ 'OTH/BTA',
                                         product_type == 'UNFENCED' ~ 'GOV/UNFEN/OP',
                                         product_type == 'GOVERNMENT' ~ 'GOV/UNFEN/OP'))

1.1.2 Hotel 2 - NY

#add price and otb_cxl_rate to h1_train
h2_train <-  h2_train %>%
  filter(OTB!=0) %>% 
  mutate(price = OTB_rev/OTB, OTB_cxl_rate = OTB_to_be_cxl/OTB)
h2_train <- h2_train %>%
  filter(days_prior < 32)
h2_train <- h2_train %>%
  mutate(day_type = case_when(day_of_week == '7' ~ 'weekend',
                              day_of_week == '1' ~ 'weekend', 
                              TRUE ~'weekday'))
h2_train$month = month(h2_train$stay_dt)

1.1.2.1 Grouping Method 1

#make these groups
h2_train_1 <- h2_train %>%
      mutate(prod_group_perc = case_when(product_type == 'OPAQUE' ~ 'OP/FENC/OTH',
                                         product_type == 'FENCED' ~ 'OP/FENC/OTH',
                                         product_type == 'CORPORATE' ~ 'CORP/GROUP/TACT',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'UNF/WHOLE/MEM/BUS',
                                         product_type == 'GROUP' ~ 'CORP/GROUP/TACT',
                                         product_type == 'TACTICAL MARKETING' ~ 'CORP/GROUP/TACT',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'UNF/WHOLE/MEM/BUS',
                                         product_type == 'OTHER' ~ 'OP/FENC/OTH',
                                         product_type == 'UNFENCED' ~ 'UNF/WHOLE/MEM/BUS',
                                         product_type == 'GOVERNMENT' ~ 'GOVERNMENT',
                                         product_type == 'WHOLESALE' ~ 'UNF/WHOLE/MEM/BUS'))

1.1.2.2 Grouping Method 2

#make this a group
h2_train_2 <- h2_train %>%
      mutate(prod_group_behav = case_when(product_type == 'OPAQUE' ~ 'OTH/OPA/FEN',
                                         product_type == 'FENCED' ~ 'OTH/OPA/FEN',
                                         product_type == 'CORPORATE' ~ 'CORP/MEMB/WHOLE',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'BTA/UNFEN/CORP/MM',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'TACTICAL MARKETING' ~ 'TACT/WHOLE',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'BTA/UNFEN/CORP/MM',
                                         product_type == 'OTHER' ~ 'OTH/OPA/FEN',
                                         product_type == 'UNFENCED' ~ 'BTA/UNFEN/CORP/MM',
                                         product_type == 'GOVERNMENT' ~ 'GOVERNMENT',
                                         product_type == 'WHOLESALE' ~ 'TACT/WHOLE'))

2 Naive Modeling Begins Here

#get just the last three weeks for test data 
hot1 <-  hot1[hot1$stay_dt >(max(hot1$stay_dt) - 21),]
hot2 <-  hot2[hot2$stay_dt >(max(hot2$stay_dt) - 21),]
#truncate the days prior for test data
hot1 <- hot1 %>% 
          filter(days_prior < 33)
hot2 <- hot2 %>%
          filter(days_prior < 32)
#add price, cxl rate
hot1<-  hot1 %>%
  filter(OTB!=0) %>% 
  mutate(price = OTB_rev/OTB, OTB_cxl_rate = OTB_to_be_cxl/OTB)
hot2 <-  hot2 %>%
  filter(OTB!=0) %>% 
  mutate(price = OTB_rev/OTB, OTB_cxl_rate = OTB_to_be_cxl/OTB)
#add month
hot1$month = month(hot1$stay_dt)
hot2$month = month(hot2$stay_dt)
#add weekday/weekend
hot1 <- hot1 %>%
  mutate(day_type = case_when(day_of_week == '7' ~ 'weekend',
                              day_of_week == '1' ~ 'weekend', 
                              TRUE ~'weekday'))
hot2 <- hot2 %>%
  mutate(day_type = case_when(day_of_week == '7' ~ 'weekend',
                              day_of_week == '1' ~ 'weekend', 
                              TRUE ~'weekday'))

2.1 Hotel 1 - ATL

2.1.1 CXL Rate by days prior

# calculate cancellation rate just by days prior
a_dp <- h1_train %>%
      select(days_prior, OTB, OTB_to_be_cxl, OTB_cxl_rate) %>%
      group_by(days_prior) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
a_dp_test <- hot1 %>%
      select(days_prior, OTB, OTB_to_be_cxl, OTB_cxl_rate, OTB_cxl_rate) %>%
      group_by(days_prior) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
a_dp_mod <- left_join(a_dp, a_dp_test, by = 'days_prior')
a_dp_mod

2.1.2 CXL Rate by groups, days prior, and weekdays

# use weekdays and grouping one
# get test data ready
hot1_test_1 <- hot1 %>%
      mutate(prod_group_perc = case_when(product_type == 'OPAQUE' ~ 'OP/FENC',
                                         product_type == 'FENCED' ~ 'OP/FENC',
                                         product_type == 'CORPORATE' ~ 'CORP/BUS',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'CORP/BUS',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'MEM/OTHER',
                                         product_type == 'OTHER' ~ 'MEM/OTHER',
                                         product_type == 'UNFENCED' ~ 'GOV/UNFENC',
                                         product_type == 'GOVERNMENT' ~ 'GOV/UNFENC'))
hot1_test_2 <- hot1 %>%
      mutate(prod_group_behav = case_when(product_type == 'OPAQUE' ~ 'OP/FENC/OTH',
                                         product_type == 'FENCED' ~ 'OP/FENC/OTH',
                                         product_type == 'CORPORATE' ~ 'CORPORATE',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'GOV/BUS/MEM',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'GOV/BUS/MEM',
                                         product_type == 'OTHER' ~ 'OP/FENC/OTH',
                                         product_type == 'UNFENCED' ~ 'UNFENCED',
                                         product_type == 'GOVERNMENT' ~ 'GOV/BUS/MEM'))
hot1_test_3 <- hot1 %>%
      mutate(prod_group_cxl_rate = case_when(product_type == 'OPAQUE' ~ 'GOV/UNFEN/OP',
                                         product_type == 'FENCED' ~ 'CORP/FEN/MEM',
                                         product_type == 'CORPORATE' ~ 'CORP/FEN/MEM',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'OTH/BTA',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'CORP/FEN/MEM',
                                         product_type == 'OTHER' ~ 'OTH/BTA',
                                         product_type == 'UNFENCED' ~ 'GOV/UNFEN/OP',
                                         product_type == 'GOVERNMENT' ~ 'GOV/UNFEN/OP'))
#group 1's CXL Rate by groups, days prior, and weekdays
a_dp_grp1_wd <- h1_train_1 %>%
      select(days_prior, prod_group_perc, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_perc, day_of_week) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
a_dp_grp1_wd_test <- hot1_test_1 %>%
      select(days_prior, prod_group_perc, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_perc, day_of_week) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
a_dp_grp1_wd_mod <- left_join(a_dp_grp1_wd, a_dp_grp1_wd_test, by = c('days_prior', 'prod_group_perc', 'day_of_week'))
a_dp_grp1_wd_mod
#group 2's CXL Rate by groups, days prior, and weekdays
a_dp_grp2_wd <- h1_train_2 %>%
      select(days_prior, prod_group_behav, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_behav, day_of_week) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
a_dp_grp2_wd_test <- hot1_test_2 %>%
      select(days_prior, prod_group_behav, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_behav, day_of_week) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
a_dp_grp2_wd_mod <- left_join(a_dp_grp2_wd, a_dp_grp2_wd_test, by = c('days_prior', 'prod_group_behav', 'day_of_week'))
a_dp_grp2_wd_mod
#group 3 and days prior
a_dp_grp3_wd <- h1_train_3 %>%
      select(days_prior, prod_group_cxl_rate, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_cxl_rate, day_of_week) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
a_dp_grp3_wd_test <- hot1_test_3 %>%
      select(days_prior, prod_group_cxl_rate, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_cxl_rate, day_of_week) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
a_dp_grp3_wd_mod <- left_join(a_dp_grp3_wd, a_dp_grp3_wd_test, by = c('days_prior', 'prod_group_cxl_rate', 'day_of_week'))
a_dp_grp3_wd_mod

2.1.3 CXL Rate by groups, days prior, day type

#group 1's CXL Rate by groups, days prior,day type
a_dp_grp1_dtp <- h1_train_1 %>%
      select(days_prior, prod_group_perc, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_perc, day_type) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
a_dp_grp1_dtp_test <- hot1_test_1 %>%
      select(days_prior, prod_group_perc, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_perc, day_type) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
a_dp_grp1_dtp_mod <- left_join(a_dp_grp1_dtp, a_dp_grp1_dtp_test, by = c('days_prior', 'prod_group_perc', 'day_type'))
a_dp_grp1_dtp_mod
#group 2's CXL Rate by groups, days prior,day type
a_dp_grp2_dtp <- h1_train_2 %>%
      select(days_prior, prod_group_behav, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_behav, day_type) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
a_dp_grp2_dtp_test <- hot1_test_2 %>%
      select(days_prior, prod_group_behav, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_behav, day_type) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
a_dp_grp2_dtp_mod <- left_join(a_dp_grp2_dtp, a_dp_grp2_dtp_test, by = c('days_prior', 'prod_group_behav', 'day_type'))
a_dp_grp2_dtp_mod
#group 3's CXL Rate by groups, days prior,day type
a_dp_grp3_dtp <- h1_train_3 %>%
      select(days_prior, prod_group_cxl_rate, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_cxl_rate, day_type) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
a_dp_grp3_dtp_test <- hot1_test_3 %>%
      select(days_prior, prod_group_cxl_rate, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_cxl_rate, day_type) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
a_dp_grp3_dtp_mod <- left_join(a_dp_grp3_dtp, a_dp_grp3_dtp_test, by = c('days_prior', 'prod_group_cxl_rate', 'day_type'))
a_dp_grp3_dtp_mod

2.2 Hotel 2 - NY

2.2.1 CXL Rate and days prior

# calculate cancellation rate just by days prior and 
n_dp <- h2_train %>%
      select(days_prior, OTB, OTB_to_be_cxl, OTB_cxl_rate) %>%
      group_by(days_prior) %>%
      summarise(predict_cxl_rate = mean(OTB_cxl_rate))
n_dp_test <- hot2 %>%
      select(days_prior, OTB, OTB_to_be_cxl, OTB_cxl_rate, OTB_cxl_rate) %>%
      group_by(days_prior) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
n_dp_mod <- left_join(n_dp, n_dp_test, by = 'days_prior')
n_dp_mod

2.2.2 CXL Rate by groups, days prior, and weekdays

# use weekdays and grouping one
# get test data ready
hot2_test_1 <- hot2 %>%
      mutate(prod_group_perc = case_when(product_type == 'OPAQUE' ~ 'OP/FENC/OTH',
                                         product_type == 'FENCED' ~ 'OP/FENC/OTH',
                                         product_type == 'CORPORATE' ~ 'CORP/GROUP/TACT',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'UNF/WHOLE/MEM/BUS',
                                         product_type == 'GROUP' ~ 'CORP/GROUP/TACT',
                                         product_type == 'TACTICAL MARKETING' ~ 'CORP/GROUP/TACT',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'UNF/WHOLE/MEM/BUS',
                                         product_type == 'OTHER' ~ 'OP/FENC/OTH',
                                         product_type == 'UNFENCED' ~ 'UNF/WHOLE/MEM/BUS',
                                         product_type == 'GOVERNMENT' ~ 'GOVERNMENT',
                                         product_type == 'WHOLESALE' ~ 'UNF/WHOLE/MEM/BUS'))
hot2_test_2 <- hot2 %>%
      mutate(prod_group_behav = case_when(product_type == 'OPAQUE' ~ 'OTH/OPA/FEN',
                                         product_type == 'FENCED' ~ 'OTH/OPA/FEN',
                                         product_type == 'CORPORATE' ~ 'CORP/MEMB/WHOLE',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'BTA/UNFEN/CORP/MM',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'TACTICAL MARKETING' ~ 'TACT/WHOLE',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'BTA/UNFEN/CORP/MM',
                                         product_type == 'OTHER' ~ 'OTH/OPA/FEN',
                                         product_type == 'UNFENCED' ~ 'BTA/UNFEN/CORP/MM',
                                         product_type == 'GOVERNMENT' ~ 'GOVERNMENT',
                                         product_type == 'WHOLESALE' ~ 'TACT/WHOLE'))
#group 1's CXL Rate by groups, days prior, and weekdays
n_dp_grp1_wd <- h2_train_1 %>%
      select(days_prior, prod_group_perc, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_perc, day_of_week) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
n_dp_grp1_wd_test <- hot2_test_1 %>%
      select(days_prior, prod_group_perc, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_perc, day_of_week) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
n_dp_grp1_wd_mod <- left_join(n_dp_grp1_wd, n_dp_grp1_wd_test, by = c('days_prior', 'prod_group_perc', 'day_of_week'))
n_dp_grp1_wd_mod
#group 2's CXL Rate by groups, days prior, and weekdays
n_dp_grp2_wd <- h2_train_2 %>%
      select(days_prior, prod_group_behav, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_behav, day_of_week) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
n_dp_grp2_wd_test <- hot2_test_2 %>%
      select(days_prior, prod_group_behav, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_behav, day_of_week) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
n_dp_grp2_wd_mod <- left_join(n_dp_grp2_wd, n_dp_grp2_wd_test, by = c('days_prior', 'prod_group_behav', 'day_of_week'))
n_dp_grp2_wd_mod

2.2.3 CXL Rate by groups, days prior, day type

#group 1's CXL Rate by groups, days prior,day type
n_dp_grp1_dtp <- h2_train_1 %>%
      select(days_prior, prod_group_perc, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_perc, day_type) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
n_dp_grp1_dtp_test <- hot2_test_1 %>%
      select(days_prior, prod_group_perc, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_perc, day_type) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
n_dp_grp1_dtp_mod <- left_join(n_dp_grp1_dtp, n_dp_grp1_dtp_test, by = c('days_prior', 'prod_group_perc', 'day_type'))
n_dp_grp1_dtp_mod
#group 2's CXL Rate by groups, days prior,day type
n_dp_grp2_dtp <- h2_train_2 %>%
      select(days_prior, prod_group_behav, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_behav, day_type) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
n_dp_grp2_dtp_test <- hot2_test_2 %>%
      select(days_prior, prod_group_behav, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_behav, day_type) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))
n_dp_grp2_dtp_mod <- left_join(n_dp_grp2_dtp, n_dp_grp2_dtp_test, by = c('days_prior', 'prod_group_behav', 'day_type'))
n_dp_grp2_dtp_mod

3 Performance Evaluation

3.1 Hotel 1 - ATL

3.1.1 CXL Rate by days prior

# forecast otb to survive
a_dp_fcs <- left_join(hot1, a_dp_mod, by="days_prior") %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
# MAE
a_dp_mae <- a_dp_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
a_dp_mae
# MAPE: the OTB_to_survive has 0 values. So ask Professor
a_dp_mape <- a_dp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range) %>% 
  summarise(meanAPE = mape(OTB_to_survive, fcst_svv))
a_dp_mape
# MASE: 
## Avg survival rate:
avg_svv_rt <- sum(h1_train$OTB_to_survive)/sum(h1_train$OTB)
## MASE
a_dp_mase <- a_dp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
a_dp_mase

3.1.2 CXL Rate by groups, days prior, and weekdays

3.1.2.1 Grouping method 1

# forecast otb to survive
a_dp_grp1_wd_fcs <- left_join(hot1_test_1, a_dp_grp1_wd_mod, by=c("days_prior", "day_of_week", "prod_group_perc")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
a_dp_grp1_wd_fcs
# MAE
a_dp_grp1_wd_mae <- a_dp_grp1_wd_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
a_dp_grp1_wd_mae
# MAPE 
# MASE
a_dp_grp1_wd_mase <- a_dp_grp1_wd_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
a_dp_grp1_wd_mase

3.1.2.2 Grouping method 2

# forecast otb to survive
a_dp_grp2_wd_fcs <- left_join(hot1_test_2, a_dp_grp2_wd_mod, by=c("days_prior", "day_of_week", "prod_group_behav")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
a_dp_grp2_wd_fcs
# MAE
a_dp_grp2_wd_mae <- a_dp_grp2_wd_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_behav) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
a_dp_grp2_wd_mae
# MAPE 
# MASE
a_dp_grp2_wd_mase <- a_dp_grp2_wd_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range, day_of_week, prod_group_behav) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
a_dp_grp2_wd_mase

3.1.2.3 Grouping method 3

# forecast otb to survive
a_dp_grp3_wd_fcs <- left_join(hot1_test_3, a_dp_grp3_wd_mod, by=c("days_prior", "day_of_week", "prod_group_cxl_rate")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
a_dp_grp3_wd_fcs
# MAE
a_dp_grp3_wd_mae <- a_dp_grp3_wd_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_cxl_rate) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
a_dp_grp3_wd_mae
# MAPE 
# MASE
a_dp_grp3_wd_mase <- a_dp_grp3_wd_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range, day_of_week, prod_group_cxl_rate) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
a_dp_grp3_wd_mase

3.1.3 CXL Rate by groups, days prior, day type

3.1.3.1 Grouping method 1

# forecast otb to survive
a_dp_grp1_dtp_fcs <- left_join(hot1_test_1, a_dp_grp1_dtp_mod, by=c("days_prior", "day_type", "prod_group_perc")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
a_dp_grp1_dtp_fcs
# MAE
a_dp_grp1_dtp_mae <- a_dp_grp1_dtp_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
a_dp_grp1_dtp_mae
# MAPE 
# MASE
a_dp_grp1_dtp_mase <- a_dp_grp1_dtp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
a_dp_grp1_dtp_mase

3.1.3.2 Grouping method 2

# forecast otb to survive
a_dp_grp2_dtp_fcs <- left_join(hot1_test_2, a_dp_grp2_dtp_mod, by=c("days_prior", "day_type", "prod_group_behav")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
a_dp_grp2_dtp_fcs
# MAE
a_dp_grp2_dtp_mae <- a_dp_grp2_dtp_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_type, prod_group_behav) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
a_dp_grp2_dtp_mae
# MAPE 
# MASE
a_dp_grp2_dtp_mase <- a_dp_grp2_dtp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range, day_type, prod_group_behav) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
a_dp_grp2_dtp_mase

3.1.3.3 Grouping method 3

# forecast otb to survive
a_dp_grp3_dtp_fcs <- left_join(hot1_test_3, a_dp_grp3_dtp_mod, by=c("days_prior", "day_type", "prod_group_cxl_rate")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
a_dp_grp3_dtp_fcs
# MAE
a_dp_grp3_dtp_mae <- a_dp_grp3_dtp_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_type, prod_group_cxl_rate) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
a_dp_grp3_dtp_mae
# MAPE 
# MASE
a_dp_grp3_dtp_mase <- a_dp_grp3_dtp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range, day_type, prod_group_cxl_rate) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
a_dp_grp3_dtp_mase

3.2 Hotel 2 - ATL

3.2.1 CXL Rate by days prior

# forecast otb to survive
n_dp_fcs <- left_join(hot2, n_dp_mod, by="days_prior") %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
# MAE
n_dp_mae <- n_dp_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
n_dp_mae
# MAPE: the OTB_to_survive doesn't have 0 values.
n_dp_mape <- n_dp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range) %>% 
  summarise(meanAPE = mape(OTB_to_survive, fcst_svv))
n_dp_mape
# MASE: 
## Avg survival rate:
avg_svv_rt_2 <- sum(h2_train$OTB_to_survive)/sum(h2_train$OTB)
## MASE
n_dp_mase <- n_dp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
n_dp_mase

3.2.2 CXL Rate by groups, days prior, and weekdays

3.2.2.1 Grouping method 1

# forecast otb to survive
n_dp_grp1_wd_fcs <- left_join(hot2_test_1, n_dp_grp1_wd_mod, by=c("days_prior", "day_of_week", "prod_group_perc")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
n_dp_grp1_wd_fcs
# MAE
n_dp_grp1_wd_mae <- n_dp_grp1_wd_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
n_dp_grp1_wd_mae
# MAPE: the OTB_to_survive doesn't have 0 values.
n_dp_grp1_wd_mape <- n_dp_grp1_wd_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanAPE = mape(OTB_to_survive, fcst_svv))
n_dp_grp1_wd_mape
# MASE
n_dp_grp1_wd_mase <- n_dp_grp1_wd_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt_2*OTB)) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
n_dp_grp1_wd_mase

3.2.2.2 Grouping method 2

# forecast otb to survive
n_dp_grp2_wd_fcs <- left_join(hot2_test_2, n_dp_grp2_wd_mod, by=c("days_prior", "day_of_week", "prod_group_behav")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
n_dp_grp2_wd_fcs
# MAE
n_dp_grp2_wd_mae <- n_dp_grp2_wd_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_behav) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
n_dp_grp2_wd_mae
# MAPE: the OTB_to_survive doesn't have 0 values.
n_dp_grp2_wd_mape <- n_dp_grp2_wd_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_behav) %>% 
  summarise(meanAPE = mape(OTB_to_survive, fcst_svv))
n_dp_grp2_wd_mape
# MASE
n_dp_grp2_wd_mase <- n_dp_grp2_wd_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt_2*OTB)) %>% 
  group_by(dp_range, day_of_week, prod_group_behav) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
n_dp_grp2_wd_mase

3.2.3 CXL Rate by groups, days prior, day type

3.2.3.1 Grouping method 1

# forecast otb to survive
n_dp_grp1_dtp_fcs <- left_join(hot2_test_1, n_dp_grp1_dtp_mod, by=c("days_prior", "day_type", "prod_group_perc")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
n_dp_grp1_dtp_fcs
# MAE
n_dp_grp1_dtp_mae <- n_dp_grp1_dtp_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
n_dp_grp1_dtp_mae
# MAPE: the OTB_to_survive doesn't have 0 values.
n_dp_grp1_dtp_mape <- n_dp_grp1_dtp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_type, prod_group_perc) %>% 
  summarise(meanAPE = mape(OTB_to_survive, fcst_svv))
n_dp_grp1_dtp_mape
# MASE
n_dp_grp1_dtp_mase <- n_dp_grp1_dtp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
n_dp_grp1_dtp_mase

3.2.3.2 Grouping method 2

# forecast otb to survive
n_dp_grp2_dtp_fcs <- left_join(hot2_test_2, n_dp_grp2_dtp_mod, by=c("days_prior", "day_type", "prod_group_behav")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
n_dp_grp2_dtp_fcs
# MAE
n_dp_grp2_dtp_mae <- n_dp_grp2_dtp_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_type, prod_group_behav) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
n_dp_grp2_dtp_mae
# MAPE: the OTB_to_survive doesn't have 0 values.
n_dp_grp2_dtp_mape <- n_dp_grp2_dtp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_type, prod_group_behav) %>% 
  summarise(meanAPE = mape(OTB_to_survive, fcst_svv))
n_dp_grp2_dtp_mape
# MASE
n_dp_grp2_dtp_mase <- n_dp_grp2_dtp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt_2*OTB)) %>% 
  group_by(dp_range, day_type, prod_group_behav) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
n_dp_grp2_dtp_mase
---
title: "Hotel Cancellation - Naive Model"
author: "Caitlin Howansky & Wei Li"
date: "`r format(Sys.time(), '%B %d, %Y')`"
output:
  html_document:
    toc: yes
    toc_depth: '3'
  html_notebook:
    number_sections: yes
    toc: yes
    toc_depth: 3
    toc_float: yes
---

#__Get Data Ready__
##__Repeat the EDA part__
```{r message = FALSE, echo = FALSE, error = FALSE}
# Clear environment of variables and functions
rm(list = ls(all = TRUE)) 
# Clear environmet of packages
if(is.null(sessionInfo()$otherPkgs) == FALSE)lapply(paste("package:", names(sessionInfo()$otherPkgs), sep=""), detach, character.only = TRUE, unload = TRUE)
```

```{r message = FALSE, echo = FALSE, error = FALSE}
library(tidyverse) 
library(dplyr) # joins
#library(janitor) # pretty cross-tabs
library(kableExtra) # pretty html tables
library(formattable)
library(gridExtra)
library(scales)
library(pastecs)
library(GGally)
library(lubridate)
library(directlabels)
library(Metrics)
```

```{r message = FALSE, echo = FALSE, error = FALSE}
# disable scientific notation in R
options(scipen=999)
```

```{r message = FALSE, echo = FALSE, error = FALSE}
# load the data files
hot1 <- read_csv('BOOKINGS_ATLCP.csv')
hot2 <- read_csv('BOOKINGS_NYCHA.csv')

```


```{r}
# change date fields to date
hot1$stay_dt <- as.Date(hot1$stay_dt, "%m/%d/%Y")
hot1$booking_dt <- as.Date(hot1$booking_dt, "%m/%d/%Y")
hot2$stay_dt <- as.Date(hot2$stay_dt, "%m/%d/%Y")
hot2$booking_dt <- as.Date(hot2$booking_dt, "%m/%d/%Y")

#create field for non-numerical dow
hot1$day_of_week <- as.factor(hot1$dow)
hot2$day_of_week <- as.factor(hot2$dow)
```

```{r}
# Reserve last 3 weeks data as Validation Data and the prior are Training Data
h1_train <- hot1[hot1$stay_dt <= (max(hot1$stay_dt) - 21),]
h2_train <- hot2[hot2$stay_dt <= (max(hot2$stay_dt) - 21),]
```

###__Hotel 1 - ATL__
```{r}
#add price and otb_cxl_rate to h1_train

h1_train <-  h1_train %>%
  filter(OTB!=0) %>% 
  mutate(price = OTB_rev/OTB, OTB_cxl_rate = OTB_to_be_cxl/OTB)

h1_train <- h1_train %>%
  filter(days_prior < 33)

h1_train <- h1_train %>%
  mutate(day_type = case_when(day_of_week == '7' ~ 'weekend',
                              day_of_week == '1' ~ 'weekend', 
                              TRUE ~'weekday'))
h1_train$month = month(h1_train$stay_dt)
```

####__Grouping method 1__
```{r}
# make this a group in h1_train
h1_train_1 <- h1_train %>%
      mutate(prod_group_perc = case_when(product_type == 'OPAQUE' ~ 'OP/FENC',
                                         product_type == 'FENCED' ~ 'OP/FENC',
                                         product_type == 'CORPORATE' ~ 'CORP/BUS',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'CORP/BUS',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'MEM/OTHER',
                                         product_type == 'OTHER' ~ 'MEM/OTHER',
                                         product_type == 'UNFENCED' ~ 'GOV/UNFENC',
                                         product_type == 'GOVERNMENT' ~ 'GOV/UNFENC'))
```

####__Grouping method 2__
```{r}
#Create this group

h1_train_2 <- h1_train %>%
      mutate(prod_group_behav = case_when(product_type == 'OPAQUE' ~ 'OP/FENC/OTH',
                                         product_type == 'FENCED' ~ 'OP/FENC/OTH',
                                         product_type == 'CORPORATE' ~ 'CORPORATE',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'GOV/BUS/MEM',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'GOV/BUS/MEM',
                                         product_type == 'OTHER' ~ 'OP/FENC/OTH',
                                         product_type == 'UNFENCED' ~ 'UNFENCED',
                                         product_type == 'GOVERNMENT' ~ 'GOV/BUS/MEM'))
```

####__Grouping method 3__
```{r}
#Create this group

h1_train_3 <- h1_train %>%
      mutate(prod_group_cxl_rate = case_when(product_type == 'OPAQUE' ~ 'GOV/UNFEN/OP',
                                         product_type == 'FENCED' ~ 'CORP/FEN/MEM',
                                         product_type == 'CORPORATE' ~ 'CORP/FEN/MEM',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'OTH/BTA',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'MEMBERSHIP' ~ 'CORP/FEN/MEM',
                                         product_type == 'OTHER' ~ 'OTH/BTA',
                                         product_type == 'UNFENCED' ~ 'GOV/UNFEN/OP',
                                         product_type == 'GOVERNMENT' ~ 'GOV/UNFEN/OP'))
```


###__Hotel 2 - NY__
```{r}
#add price and otb_cxl_rate to h1_train

h2_train <-  h2_train %>%
  filter(OTB!=0) %>% 
  mutate(price = OTB_rev/OTB, OTB_cxl_rate = OTB_to_be_cxl/OTB)

h2_train <- h2_train %>%
  filter(days_prior < 32)

h2_train <- h2_train %>%
  mutate(day_type = case_when(day_of_week == '7' ~ 'weekend',
                              day_of_week == '1' ~ 'weekend', 
                              TRUE ~'weekday'))
h2_train$month = month(h2_train$stay_dt)
```

####__Grouping Method 1__
```{r}
#make these groups
h2_train_1 <- h2_train %>%
      mutate(prod_group_perc = case_when(product_type == 'OPAQUE' ~ 'OP/FENC/OTH',
                                         product_type == 'FENCED' ~ 'OP/FENC/OTH',
                                         product_type == 'CORPORATE' ~ 'CORP/GROUP/TACT',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'UNF/WHOLE/MEM/BUS',
                                         product_type == 'GROUP' ~ 'CORP/GROUP/TACT',
                                         product_type == 'TACTICAL MARKETING' ~ 'CORP/GROUP/TACT',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'UNF/WHOLE/MEM/BUS',
                                         product_type == 'OTHER' ~ 'OP/FENC/OTH',
                                         product_type == 'UNFENCED' ~ 'UNF/WHOLE/MEM/BUS',
                                         product_type == 'GOVERNMENT' ~ 'GOVERNMENT',
                                         product_type == 'WHOLESALE' ~ 'UNF/WHOLE/MEM/BUS'))
```

####__Grouping Method 2 __
```{r}
#make this a group
h2_train_2 <- h2_train %>%
      mutate(prod_group_behav = case_when(product_type == 'OPAQUE' ~ 'OTH/OPA/FEN',
                                         product_type == 'FENCED' ~ 'OTH/OPA/FEN',
                                         product_type == 'CORPORATE' ~ 'CORP/MEMB/WHOLE',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'BTA/UNFEN/CORP/MM',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'TACTICAL MARKETING' ~ 'TACT/WHOLE',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'BTA/UNFEN/CORP/MM',
                                         product_type == 'OTHER' ~ 'OTH/OPA/FEN',
                                         product_type == 'UNFENCED' ~ 'BTA/UNFEN/CORP/MM',
                                         product_type == 'GOVERNMENT' ~ 'GOVERNMENT',
                                         product_type == 'WHOLESALE' ~ 'TACT/WHOLE'))

```


#__Naive Modeling Begins Here__
```{r}
#get just the last three weeks for test data 
hot1 <-  hot1[hot1$stay_dt >(max(hot1$stay_dt) - 21),]
hot2 <-  hot2[hot2$stay_dt >(max(hot2$stay_dt) - 21),]
```

```{r}
#truncate the days prior for test data
hot1 <- hot1 %>% 
          filter(days_prior < 33)

hot2 <- hot2 %>%
          filter(days_prior < 32)

#add price, cxl rate
hot1<-  hot1 %>%
  filter(OTB!=0) %>% 
  mutate(price = OTB_rev/OTB, OTB_cxl_rate = OTB_to_be_cxl/OTB)

hot2 <-  hot2 %>%
  filter(OTB!=0) %>% 
  mutate(price = OTB_rev/OTB, OTB_cxl_rate = OTB_to_be_cxl/OTB)

#add month
hot1$month = month(hot1$stay_dt)
hot2$month = month(hot2$stay_dt)

#add weekday/weekend
hot1 <- hot1 %>%
  mutate(day_type = case_when(day_of_week == '7' ~ 'weekend',
                              day_of_week == '1' ~ 'weekend', 
                              TRUE ~'weekday'))
hot2 <- hot2 %>%
  mutate(day_type = case_when(day_of_week == '7' ~ 'weekend',
                              day_of_week == '1' ~ 'weekend', 
                              TRUE ~'weekday'))
```


##__Hotel 1 - ATL__
###__CXL Rate by days prior__
```{r}
# calculate cancellation rate just by days prior
a_dp <- h1_train %>%
      select(days_prior, OTB, OTB_to_be_cxl, OTB_cxl_rate) %>%
      group_by(days_prior) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

a_dp_test <- hot1 %>%
      select(days_prior, OTB, OTB_to_be_cxl, OTB_cxl_rate, OTB_cxl_rate) %>%
      group_by(days_prior) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

a_dp_mod <- left_join(a_dp, a_dp_test, by = 'days_prior')
a_dp_mod
```

###__CXL Rate by groups, days prior, and weekdays__
```{r}
# use weekdays and grouping one
# get test data ready

hot1_test_1 <- hot1 %>%
      mutate(prod_group_perc = case_when(product_type == 'OPAQUE' ~ 'OP/FENC',
                                         product_type == 'FENCED' ~ 'OP/FENC',
                                         product_type == 'CORPORATE' ~ 'CORP/BUS',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'CORP/BUS',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'MEM/OTHER',
                                         product_type == 'OTHER' ~ 'MEM/OTHER',
                                         product_type == 'UNFENCED' ~ 'GOV/UNFENC',
                                         product_type == 'GOVERNMENT' ~ 'GOV/UNFENC'))
hot1_test_2 <- hot1 %>%
      mutate(prod_group_behav = case_when(product_type == 'OPAQUE' ~ 'OP/FENC/OTH',
                                         product_type == 'FENCED' ~ 'OP/FENC/OTH',
                                         product_type == 'CORPORATE' ~ 'CORPORATE',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'GOV/BUS/MEM',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'GOV/BUS/MEM',
                                         product_type == 'OTHER' ~ 'OP/FENC/OTH',
                                         product_type == 'UNFENCED' ~ 'UNFENCED',
                                         product_type == 'GOVERNMENT' ~ 'GOV/BUS/MEM'))

hot1_test_3 <- hot1 %>%
      mutate(prod_group_cxl_rate = case_when(product_type == 'OPAQUE' ~ 'GOV/UNFEN/OP',
                                         product_type == 'FENCED' ~ 'CORP/FEN/MEM',
                                         product_type == 'CORPORATE' ~ 'CORP/FEN/MEM',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'OTH/BTA',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'CORP/FEN/MEM',
                                         product_type == 'OTHER' ~ 'OTH/BTA',
                                         product_type == 'UNFENCED' ~ 'GOV/UNFEN/OP',
                                         product_type == 'GOVERNMENT' ~ 'GOV/UNFEN/OP'))
```

```{r}
#group 1's CXL Rate by groups, days prior, and weekdays
a_dp_grp1_wd <- h1_train_1 %>%
      select(days_prior, prod_group_perc, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_perc, day_of_week) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

a_dp_grp1_wd_test <- hot1_test_1 %>%
      select(days_prior, prod_group_perc, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_perc, day_of_week) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

a_dp_grp1_wd_mod <- left_join(a_dp_grp1_wd, a_dp_grp1_wd_test, by = c('days_prior', 'prod_group_perc', 'day_of_week'))
a_dp_grp1_wd_mod
```

```{r}
#group 2's CXL Rate by groups, days prior, and weekdays
a_dp_grp2_wd <- h1_train_2 %>%
      select(days_prior, prod_group_behav, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_behav, day_of_week) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

a_dp_grp2_wd_test <- hot1_test_2 %>%
      select(days_prior, prod_group_behav, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_behav, day_of_week) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

a_dp_grp2_wd_mod <- left_join(a_dp_grp2_wd, a_dp_grp2_wd_test, by = c('days_prior', 'prod_group_behav', 'day_of_week'))
a_dp_grp2_wd_mod
```

```{r}
#group 3 and days prior
a_dp_grp3_wd <- h1_train_3 %>%
      select(days_prior, prod_group_cxl_rate, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_cxl_rate, day_of_week) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

a_dp_grp3_wd_test <- hot1_test_3 %>%
      select(days_prior, prod_group_cxl_rate, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_cxl_rate, day_of_week) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

a_dp_grp3_wd_mod <- left_join(a_dp_grp3_wd, a_dp_grp3_wd_test, by = c('days_prior', 'prod_group_cxl_rate', 'day_of_week'))
a_dp_grp3_wd_mod
```

###__CXL Rate by groups, days prior, day type__

```{r}
#group 1's CXL Rate by groups, days prior,day type
a_dp_grp1_dtp <- h1_train_1 %>%
      select(days_prior, prod_group_perc, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_perc, day_type) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

a_dp_grp1_dtp_test <- hot1_test_1 %>%
      select(days_prior, prod_group_perc, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_perc, day_type) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

a_dp_grp1_dtp_mod <- left_join(a_dp_grp1_dtp, a_dp_grp1_dtp_test, by = c('days_prior', 'prod_group_perc', 'day_type'))
a_dp_grp1_dtp_mod
```

```{r}
#group 2's CXL Rate by groups, days prior,day type
a_dp_grp2_dtp <- h1_train_2 %>%
      select(days_prior, prod_group_behav, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_behav, day_type) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

a_dp_grp2_dtp_test <- hot1_test_2 %>%
      select(days_prior, prod_group_behav, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_behav, day_type) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

a_dp_grp2_dtp_mod <- left_join(a_dp_grp2_dtp, a_dp_grp2_dtp_test, by = c('days_prior', 'prod_group_behav', 'day_type'))
a_dp_grp2_dtp_mod
```

```{r}
#group 3's CXL Rate by groups, days prior,day type
a_dp_grp3_dtp <- h1_train_3 %>%
      select(days_prior, prod_group_cxl_rate, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_cxl_rate, day_type) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

a_dp_grp3_dtp_test <- hot1_test_3 %>%
      select(days_prior, prod_group_cxl_rate, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_cxl_rate, day_type) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

a_dp_grp3_dtp_mod <- left_join(a_dp_grp3_dtp, a_dp_grp3_dtp_test, by = c('days_prior', 'prod_group_cxl_rate', 'day_type'))
a_dp_grp3_dtp_mod
```


##__Hotel 2 - NY__
###__CXL Rate and days prior__
```{r}
# calculate cancellation rate just by days prior and 
n_dp <- h2_train %>%
      select(days_prior, OTB, OTB_to_be_cxl, OTB_cxl_rate) %>%
      group_by(days_prior) %>%
      summarise(predict_cxl_rate = mean(OTB_cxl_rate))

n_dp_test <- hot2 %>%
      select(days_prior, OTB, OTB_to_be_cxl, OTB_cxl_rate, OTB_cxl_rate) %>%
      group_by(days_prior) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

n_dp_mod <- left_join(n_dp, n_dp_test, by = 'days_prior')
n_dp_mod
```

###__CXL Rate by groups, days prior, and weekdays__
```{r}
# use weekdays and grouping one
# get test data ready

hot2_test_1 <- hot2 %>%
      mutate(prod_group_perc = case_when(product_type == 'OPAQUE' ~ 'OP/FENC/OTH',
                                         product_type == 'FENCED' ~ 'OP/FENC/OTH',
                                         product_type == 'CORPORATE' ~ 'CORP/GROUP/TACT',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'UNF/WHOLE/MEM/BUS',
                                         product_type == 'GROUP' ~ 'CORP/GROUP/TACT',
                                         product_type == 'TACTICAL MARKETING' ~ 'CORP/GROUP/TACT',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'UNF/WHOLE/MEM/BUS',
                                         product_type == 'OTHER' ~ 'OP/FENC/OTH',
                                         product_type == 'UNFENCED' ~ 'UNF/WHOLE/MEM/BUS',
                                         product_type == 'GOVERNMENT' ~ 'GOVERNMENT',
                                         product_type == 'WHOLESALE' ~ 'UNF/WHOLE/MEM/BUS'))
hot2_test_2 <- hot2 %>%
      mutate(prod_group_behav = case_when(product_type == 'OPAQUE' ~ 'OTH/OPA/FEN',
                                         product_type == 'FENCED' ~ 'OTH/OPA/FEN',
                                         product_type == 'CORPORATE' ~ 'CORP/MEMB/WHOLE',
                                         product_type == 'BUSINESS TRAVEL AGENCIES' ~ 'BTA/UNFEN/CORP/MM',
                                         product_type == 'GROUP' ~ 'GROUP',
                                         product_type == 'TACTICAL MARKETING' ~ 'TACT/WHOLE',
                                         product_type == 'MEMBERSHIP MARKETING' ~ 'BTA/UNFEN/CORP/MM',
                                         product_type == 'OTHER' ~ 'OTH/OPA/FEN',
                                         product_type == 'UNFENCED' ~ 'BTA/UNFEN/CORP/MM',
                                         product_type == 'GOVERNMENT' ~ 'GOVERNMENT',
                                         product_type == 'WHOLESALE' ~ 'TACT/WHOLE'))

```

```{r}
#group 1's CXL Rate by groups, days prior, and weekdays
n_dp_grp1_wd <- h2_train_1 %>%
      select(days_prior, prod_group_perc, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_perc, day_of_week) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

n_dp_grp1_wd_test <- hot2_test_1 %>%
      select(days_prior, prod_group_perc, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_perc, day_of_week) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

n_dp_grp1_wd_mod <- left_join(n_dp_grp1_wd, n_dp_grp1_wd_test, by = c('days_prior', 'prod_group_perc', 'day_of_week'))
n_dp_grp1_wd_mod
```

```{r}
#group 2's CXL Rate by groups, days prior, and weekdays
n_dp_grp2_wd <- h2_train_2 %>%
      select(days_prior, prod_group_behav, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_behav, day_of_week) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

n_dp_grp2_wd_test <- hot2_test_2 %>%
      select(days_prior, prod_group_behav, day_of_week, OTB, OTB_to_be_cxl) %>%
      group_by(days_prior, prod_group_behav, day_of_week) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

n_dp_grp2_wd_mod <- left_join(n_dp_grp2_wd, n_dp_grp2_wd_test, by = c('days_prior', 'prod_group_behav', 'day_of_week'))
n_dp_grp2_wd_mod
```

###__CXL Rate by groups, days prior, day type__

```{r}
#group 1's CXL Rate by groups, days prior,day type
n_dp_grp1_dtp <- h2_train_1 %>%
      select(days_prior, prod_group_perc, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_perc, day_type) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

n_dp_grp1_dtp_test <- hot2_test_1 %>%
      select(days_prior, prod_group_perc, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_perc, day_type) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

n_dp_grp1_dtp_mod <- left_join(n_dp_grp1_dtp, n_dp_grp1_dtp_test, by = c('days_prior', 'prod_group_perc', 'day_type'))
n_dp_grp1_dtp_mod
```

```{r}
#group 2's CXL Rate by groups, days prior,day type
n_dp_grp2_dtp <- h2_train_2 %>%
      select(days_prior, prod_group_behav, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_behav, day_type) %>%
      summarise(predict_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

n_dp_grp2_dtp_test <- hot2_test_2 %>%
      select(days_prior, prod_group_behav, day_type, OTB, OTB_to_be_cxl, month) %>%
      group_by(days_prior, prod_group_behav, day_type) %>%
      summarise(true_cxl_rate = sum(OTB_to_be_cxl)/sum(OTB))

n_dp_grp2_dtp_mod <- left_join(n_dp_grp2_dtp, n_dp_grp2_dtp_test, by = c('days_prior', 'prod_group_behav', 'day_type'))
n_dp_grp2_dtp_mod
```

#__Performance Evaluation__
##__Hotel 1 - ATL__
###__CXL Rate by days prior__
```{r}
# forecast otb to survive
a_dp_fcs <- left_join(hot1, a_dp_mod, by="days_prior") %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
```

```{r}
# MAE
a_dp_mae <- a_dp_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
a_dp_mae
```
```{r}
# MAPE: the OTB_to_survive has 0 values. So ask Professor
a_dp_mape <- a_dp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range) %>% 
  summarise(meanAPE = mape(OTB_to_survive, fcst_svv))
a_dp_mape
```

```{r}
# MASE: 
## Avg survival rate:
avg_svv_rt <- sum(h1_train$OTB_to_survive)/sum(h1_train$OTB)
## MASE
a_dp_mase <- a_dp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
a_dp_mase
```

###__CXL Rate by groups, days prior, and weekdays__
#### Grouping method 1
```{r}
# forecast otb to survive
a_dp_grp1_wd_fcs <- left_join(hot1_test_1, a_dp_grp1_wd_mod, by=c("days_prior", "day_of_week", "prod_group_perc")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
a_dp_grp1_wd_fcs
```

```{r}
# MAE
a_dp_grp1_wd_mae <- a_dp_grp1_wd_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
a_dp_grp1_wd_mae
```

```{r}
# MAPE 
```

```{r}
# MASE
a_dp_grp1_wd_mase <- a_dp_grp1_wd_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
a_dp_grp1_wd_mase
```

#### Grouping method 2

```{r}
# forecast otb to survive
a_dp_grp2_wd_fcs <- left_join(hot1_test_2, a_dp_grp2_wd_mod, by=c("days_prior", "day_of_week", "prod_group_behav")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
a_dp_grp2_wd_fcs
```

```{r}
# MAE
a_dp_grp2_wd_mae <- a_dp_grp2_wd_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_behav) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
a_dp_grp2_wd_mae
```

```{r}
# MAPE 
```

```{r}
# MASE
a_dp_grp2_wd_mase <- a_dp_grp2_wd_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range, day_of_week, prod_group_behav) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
a_dp_grp2_wd_mase
```

#### Grouping method 3
```{r}
# forecast otb to survive
a_dp_grp3_wd_fcs <- left_join(hot1_test_3, a_dp_grp3_wd_mod, by=c("days_prior", "day_of_week", "prod_group_cxl_rate")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
a_dp_grp3_wd_fcs
```


```{r}
# MAE
a_dp_grp3_wd_mae <- a_dp_grp3_wd_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_cxl_rate) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
a_dp_grp3_wd_mae
```

```{r}
# MAPE 
```

```{r}
# MASE
a_dp_grp3_wd_mase <- a_dp_grp3_wd_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range, day_of_week, prod_group_cxl_rate) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
a_dp_grp3_wd_mase
```


###__CXL Rate by groups, days prior, day type__
#### Grouping method 1
```{r}
# forecast otb to survive
a_dp_grp1_dtp_fcs <- left_join(hot1_test_1, a_dp_grp1_dtp_mod, by=c("days_prior", "day_type", "prod_group_perc")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
a_dp_grp1_dtp_fcs
```
```{r}
# MAE
a_dp_grp1_dtp_mae <- a_dp_grp1_dtp_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
a_dp_grp1_dtp_mae
```

```{r}
# MAPE 
```

```{r}
# MASE
a_dp_grp1_dtp_mase <- a_dp_grp1_dtp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
a_dp_grp1_dtp_mase
```

#### Grouping method 2

```{r}
# forecast otb to survive
a_dp_grp2_dtp_fcs <- left_join(hot1_test_2, a_dp_grp2_dtp_mod, by=c("days_prior", "day_type", "prod_group_behav")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
a_dp_grp2_dtp_fcs
```

```{r}
# MAE
a_dp_grp2_dtp_mae <- a_dp_grp2_dtp_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_type, prod_group_behav) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
a_dp_grp2_dtp_mae
```

```{r}
# MAPE 
```

```{r}
# MASE
a_dp_grp2_dtp_mase <- a_dp_grp2_dtp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range, day_type, prod_group_behav) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
a_dp_grp2_dtp_mase
```

#### Grouping method 3
```{r}
# forecast otb to survive
a_dp_grp3_dtp_fcs <- left_join(hot1_test_3, a_dp_grp3_dtp_mod, by=c("days_prior", "day_type", "prod_group_cxl_rate")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
a_dp_grp3_dtp_fcs
```


```{r}
# MAE
a_dp_grp3_dtp_mae <- a_dp_grp3_dtp_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_type, prod_group_cxl_rate) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
a_dp_grp3_dtp_mae
```

```{r}
# MAPE 
```

```{r}
# MASE
a_dp_grp3_dtp_mase <- a_dp_grp3_dtp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range, day_type, prod_group_cxl_rate) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
a_dp_grp3_dtp_mase
```



##__Hotel 2 - ATL__
###__CXL Rate by days prior__
```{r}
# forecast otb to survive
n_dp_fcs <- left_join(hot2, n_dp_mod, by="days_prior") %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
```

```{r}
# MAE
n_dp_mae <- n_dp_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
n_dp_mae
```

```{r}
# MAPE: the OTB_to_survive doesn't have 0 values.
n_dp_mape <- n_dp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range) %>% 
  summarise(meanAPE = mape(OTB_to_survive, fcst_svv))
n_dp_mape
```

```{r}
# MASE: 
## Avg survival rate:
avg_svv_rt_2 <- sum(h2_train$OTB_to_survive)/sum(h2_train$OTB)
## MASE
n_dp_mase <- n_dp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
n_dp_mase
```

###__CXL Rate by groups, days prior, and weekdays__
#### Grouping method 1
```{r}
# forecast otb to survive
n_dp_grp1_wd_fcs <- left_join(hot2_test_1, n_dp_grp1_wd_mod, by=c("days_prior", "day_of_week", "prod_group_perc")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
n_dp_grp1_wd_fcs
```

```{r}
# MAE
n_dp_grp1_wd_mae <- n_dp_grp1_wd_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
n_dp_grp1_wd_mae
```

```{r}
# MAPE: the OTB_to_survive doesn't have 0 values.
n_dp_grp1_wd_mape <- n_dp_grp1_wd_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanAPE = mape(OTB_to_survive, fcst_svv))
n_dp_grp1_wd_mape

```

```{r}
# MASE
n_dp_grp1_wd_mase <- n_dp_grp1_wd_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt_2*OTB)) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
n_dp_grp1_wd_mase
```

#### Grouping method 2

```{r}
# forecast otb to survive
n_dp_grp2_wd_fcs <- left_join(hot2_test_2, n_dp_grp2_wd_mod, by=c("days_prior", "day_of_week", "prod_group_behav")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
n_dp_grp2_wd_fcs
```



```{r}
# MAE
n_dp_grp2_wd_mae <- n_dp_grp2_wd_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_behav) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
n_dp_grp2_wd_mae
```

```{r}
# MAPE: the OTB_to_survive doesn't have 0 values.
n_dp_grp2_wd_mape <- n_dp_grp2_wd_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_behav) %>% 
  summarise(meanAPE = mape(OTB_to_survive, fcst_svv))
n_dp_grp2_wd_mape

```

```{r}
# MASE
n_dp_grp2_wd_mase <- n_dp_grp2_wd_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt_2*OTB)) %>% 
  group_by(dp_range, day_of_week, prod_group_behav) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
n_dp_grp2_wd_mase
```

###__CXL Rate by groups, days prior, day type__
#### Grouping method 1
```{r}
# forecast otb to survive
n_dp_grp1_dtp_fcs <- left_join(hot2_test_1, n_dp_grp1_dtp_mod, by=c("days_prior", "day_type", "prod_group_perc")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
n_dp_grp1_dtp_fcs
```
```{r}
# MAE
n_dp_grp1_dtp_mae <- n_dp_grp1_dtp_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
n_dp_grp1_dtp_mae
```

```{r}
# MAPE: the OTB_to_survive in GOVERNMENT contains 0 values, so MAPE is INF
n_dp_grp1_dtp_mape <- n_dp_grp1_dtp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_type, prod_group_perc) %>% 
  summarise(meanAPE = mape(OTB_to_survive, fcst_svv))
n_dp_grp1_dtp_mape


```

```{r}
# MASE
n_dp_grp1_dtp_mase <- n_dp_grp1_dtp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt*OTB)) %>% 
  group_by(dp_range, day_of_week, prod_group_perc) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
n_dp_grp1_dtp_mase
```

#### Grouping method 2
```{r}
# forecast otb to survive
n_dp_grp2_dtp_fcs <- left_join(hot2_test_2, n_dp_grp2_dtp_mod, by=c("days_prior", "day_type", "prod_group_behav")) %>% 
  mutate(fcst_svv = OTB-OTB*predict_cxl_rate)
n_dp_grp2_dtp_fcs
```


```{r}
# MAE
n_dp_grp2_dtp_mae <- n_dp_grp2_dtp_fcs %>% 
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_type, prod_group_behav) %>% 
  summarise(meanAE = mae(OTB_to_survive, fcst_svv))
n_dp_grp2_dtp_mae
```

```{r}
# MAPE: the OTB_to_survive in GOVERNMENT contains 0 values, so MAPE is INF
n_dp_grp2_dtp_mape <- n_dp_grp2_dtp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32')) %>% 
  group_by(dp_range, day_type, prod_group_behav) %>% 
  summarise(meanAPE = mape(OTB_to_survive, fcst_svv))
n_dp_grp2_dtp_mape

```

```{r}
# MASE
n_dp_grp2_dtp_mase <- n_dp_grp2_dtp_fcs %>%  
  filter(days_prior!=0) %>% 
  mutate(dp_range = case_when(days_prior >=1 & days_prior<=7 ~ '1_7',
                                  days_prior >=8 & days_prior<=14 ~ '8_14',
                                  days_prior >=15 & days_prior<=21 ~ '15_21',
                                  days_prior >=22 & days_prior<=28 ~ '22_28',
                                  days_prior >=29 ~ '29_32'),
         abs_diff_1=abs(OTB_to_survive-fcst_svv),
         abs_diff_2=abs(OTB_to_survive-avg_svv_rt_2*OTB)) %>% 
  group_by(dp_range, day_type, prod_group_behav) %>% 
  summarise(meanASE = sum(abs_diff_1)/sum(abs_diff_2))
n_dp_grp2_dtp_mase
```